import scrapy

# 创建爬虫项目：scrapy startproject myspider

# 创建爬虫：scrapy genspider baidu baidu.com  (要在最里面的myspider目录下创建)

# 运行爬虫：scrapy crawl baidu  (要在最里面的myspider目录下运行)



class BaiduSpider(scrapy.Spider):
    name = "baidu"
    allowed_domains = ["www.baidu.com"]  # 必须是列表，且域名不带协议（https://）
    start_urls = ["https://www.baidu.com"]  # 必须是完整URL

    def start_requests(self):
        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Pragma": "no-cache",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
            "sec-ch-ua": "\"Chromium\";v=\"130\", \"Google Chrome\";v=\"130\", \"Not?A_Brand\";v=\"99\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\""
        }
        cookies = {
            "pcMainBoxRec": "1",
            "sug": "0",
            "sugstore": "0",
            "ORIGIN": "2",
            "bdime": "0",
            "BD_HOME": "1",
            "BD_UPN": "12314753",
            "BD_CK_SAM": "1",
            "shifen[672875141305_26418]": "1754225977",
            "baikeVisitId": "919d180f-7f69-45e5-893c-64cacfa83c80",
            "shifen[1180460674802_48285]": "1754310067",
            "COOKIE_SESSION": "71535_2_7_8_7_19_0_0_7_8_1_4_229499_84089_0_14_1754031726_1754310067_1754310053%7C9%2384079_135_1754310053%7C9",
            "PSTM": "1754447574",
            "BAIDUID": "F0AD8179EA5D98860DB27EC407F3F4E7:FG=1",
            "BIDUPSID": "FABDF80E3B448E1876B9C79BB6BCCDAF",
            "BA_HECTOR": "25a4018l21210k2l810184a18g81aj1k95fmm25",
            "BAIDUID_BFESS": "F0AD8179EA5D98860DB27EC407F3F4E7:FG=1",
            "ZFY": "ZMrlMEPqEsxNtK1qpMCdTW6528fGHsTOoqNhcC40b4E:C",
            "H_PS_PSSID": "60275_62325_63143_63326_63881_63948_64125_64164_64173_64182_64222_64219_64245_64247_64252_64259_64260_64271_64317_64359_64366_64363_64361_64372"
        }
        yield scrapy.Request(
            url="https://www.baidu.com",
            headers=headers,
            cookies=cookies,
            callback=self.parse
        )

    def parse(self, response):
        with open('baidu.html', 'wb') as f:
            f.write(response.body)


